/*==============================================================================
FILE NAME: Figure_C3.do
CREATED: 25 July 2025
==============================================================================*/


/* Set directory if working independently through code
if c(username)=="" { //insert username
	global rootdir "" // insert root path
	global processed_data "$rootdir/processed_data" 
	global figures "$rootdir/output/figures"  // Define global paths for replication package
} 
*/

**Figure C3
use "$processed_data/incidents_clean.dta", replace
** can add back in date and location information if needed **
keep RN CIN IncidentDescription IncidentStatus Number_Of_Complaints incident_air incident_water IncidentNature IncidentEffect incident_waste InvestigationStartDate County_CIN ZipCode_CIN ZipFlag_CIN year
duplicates drop
egen ID = group(RN CIN)
sort ID
bys ID: egen count = count(ID)
duplicates drop ID, force
drop ID
egen ID = group(RN CIN)
save "$processed_data/unique_complaints.dta", replace
use "$processed_data/unique_complaints.dta", clear

//Drop RN CIN
replace IncidentDescription = subinstr(IncidentDescription,"$","",.)
replace IncidentDescription = subinstr(IncidentDescription,",","",.)
replace IncidentDescription = subinstr(IncidentDescription,".","",.)
replace IncidentDescription = subinstr(IncidentDescription,"'","",.)
replace IncidentDescription = subinstr(IncidentDescription,"-","",.)
replace IncidentDescription = subinstr(IncidentDescription,"--","",.)
replace IncidentDescription = subinstr(IncidentDescription,"---","",.)
replace IncidentDescription = subinstr(IncidentDescription,"?","",.)
replace IncidentDescription = subinstr(IncidentDescription,"!","",.)
replace IncidentDescription = subinstr(IncidentDescription,"@","",.)
replace IncidentDescription = subinstr(IncidentDescription,":","",.)
replace IncidentDescription = subinstr(IncidentDescription,";","",.)
replace IncidentDescription = subinstr(IncidentDescription,"%","",.)
replace IncidentDescription = subinstr(IncidentDescription,"&","",.)
replace IncidentDescription = subinstr(IncidentDescription,"*","",.)
replace IncidentDescription = subinstr(IncidentDescription,"(","",.)
replace IncidentDescription = subinstr(IncidentDescription,")","",.)
replace IncidentDescription = subinstr(IncidentDescription,"#","",.)
replace IncidentDescription = subinstr(IncidentDescription,"/","",.)
replace IncidentDescription = subinstr(IncidentDescription,"\","",.)
replace IncidentDescription = subinstr(IncidentDescription,"0","",.)
replace IncidentDescription = subinstr(IncidentDescription,"1","",.)
replace IncidentDescription = subinstr(IncidentDescription,"2","",.)
replace IncidentDescription = subinstr(IncidentDescription,"3","",.)
replace IncidentDescription = subinstr(IncidentDescription,"4","",.)
replace IncidentDescription = subinstr(IncidentDescription,"5","",.)
replace IncidentDescription = subinstr(IncidentDescription,"6","",.)
replace IncidentDescription = subinstr(IncidentDescription,"7","",.)
replace IncidentDescription = subinstr(IncidentDescription,"8","",.)
replace IncidentDescription = subinstr(IncidentDescription,"9","",.)
replace IncidentDescription = subinstr(IncidentDescription,"<","",.)
replace IncidentDescription = subinstr(IncidentDescription,">","",.)
replace IncidentDescription = subinstr(IncidentDescription,"=","",.)
replace IncidentDescription = subinstr(IncidentDescription,"_","",.)
replace IncidentDescription = subinstr(IncidentDescription,"+","",.)
replace IncidentDescription = subinstr(IncidentDescription,"~","",.)
replace IncidentDescription = subinstr(IncidentDescription,"^","",.)
replace IncidentDescription = subinstr(IncidentDescription,"`","",.)
replace IncidentDescription = lower(IncidentDescription)
replace IncidentDescription = subinstr(IncidentDescription,char(10),"",.)
replace IncidentDescription = subinstr(IncidentDescription,char(13),"",.)
drop if IncidentDescription == ""
gen ODOR = 0
replace ODOR = 1 if IncidentNature == "ODOR"
gen DUST = 0
replace DUST = 1 if IncidentNature == "DUST"
gen SMOKE = 0
replace SMOKE = 1 if IncidentNature == "SMOKE"
gen OUTDOOR_BURNING = 0
replace OUTDOOR_BURNING = 1 if IncidentNature == "OUTDOOR BURNING"
gen OTHER_AIR = 0
replace OTHER_AIR = 1 if SMOKE == 0 & OUTDOOR_BURNING == 0 & ODOR == 0 & DUST == 0
gen MUNICIPAL = 0
replace MUNICIPAL = 1 if IncidentNature == "MUNICIPAL NON-INDUSTRIAL" | IncidentNature == "MUNICIPAL - DO NOT USE AFTER 04/04"
gen INDUSTRIAL = 0
replace INDUSTRIAL = 1 if IncidentNature == "INDUSTRIAL"
gen PST = 0
replace PST = 1 if IncidentNature == "PST"
gen OTHER_WASTE = 0
replace OTHER_WASTE = 1 if MUNICIPAL == 0 & INDUSTRIAL == 0 & ODOR == 0 & PST == 0
gen WASTEWATER = 0
replace WASTEWATER = 1 if IncidentNature == "WASTEWATER"
gen WATER_SUPPLY_QUALITY = 0
replace WATER_SUPPLY_QUALITY = 1 if IncidentNature == "WATER SUPPLY QUALITY"
gen STORMWATER = 0
replace STORMWATER = 1 if IncidentNature == "STORMWATER"
gen WATER_SUPPLY_SERVICE = 0
replace WATER_SUPPLY_SERVICE = 1 if IncidentNature == "WATER SUPPLY SERVICE"
gen OTHER_WATER = 0
replace OTHER_WATER = 1 if WASTEWATE == 0 & WATER_SUPPLY_QUALITY == 0 & STORMWATER == 0 & WATER_SUPPLY_SERVICE == 0
gen HEALTH = 0
replace HEALTH = 1 if strpos(IncidentEffect,"HEALTH")
gen PROPERTY = 0
replace PROPERTY = 1 if strpos(IncidentEffect, "PROPERTY")
gen OIL_GAS_SHALE = 0
replace OIL_GAS_SHALE = 1 if strpos(IncidentEffect, "OIL AND GAS") | ///
strpos(IncidentEffect, "EAGLE FORD SHALE") | strpos(IncidentEffect, "BARNETT SHALE")
gen OTHER = 0
replace OTHER = 1 if HEALTH == 0 & PROPERTY == 0 & OIL_GAS_SHALE == 0
save "$processed_data/unique_complaints_clean.dta", replace

use "$processed_data/unique_complaints_clean.dta", clear
drop if year < 2003
drop if year > 2019
preserve
keep IncidentDescription RN CIN ID year
export delimited using "complaints_with_year.txt", delimiter(tab) replace
keep IncidentDescription ID
export delimited using "complaints.txt", delimiter(tab) replace
restore


// COMPLAINT COUNT FIGURE
rename Number_Of_Complaints Number_Of_Air_Complaints

// Calculate and export point estimates
quietly count if Number_Of_Air_Complaints > 0 & Number_Of_Air_Complaints <= 5
local total = r(N)

forvalues i = 1/5 {
    quietly count if Number_Of_Air_Complaints == `i'
    local freq`i' = r(N)
    local pct`i' = round((`freq`i''/`total')*100, 0.1)
}

preserve
clear
set obs 6
gen complaint_count = .
gen frequency = .
gen percentage = .

forvalues i = 1/5 {
    replace complaint_count = `i' in `i'
    replace frequency = `freq`i'' in `i'
    replace percentage = `pct`i'' in `i'
}

replace complaint_count = 999 in 6
replace frequency = `total' in 6
replace percentage = 100 in 6

export delimited using "$point_estimates/Point_Estimates_Figure_C3.csv", replace
restore
graph set window fontface "Times New Roman"
hist Number_Of_Air_Complaints if Number_Of_Air_Complaints > 0 & Number_Of_Air_Complaints <= 5, xlab(1(1)5, nogrid labsize(vlarge)) discrete start(1) width(1) percent bcolor(navy) xtitle("Number of Complaints Received", size(vlarge)) xlabel(, labsize(vlarge)) ylabel(, labsize(vlarge)) ytitle("Percent (%)", size(vlarge))
graph export "$figures/Figure_C3.pdf", replace